
/*

This program creates all of the tables, figures, and numbers reported in the texts of:

  - Sumit Agarwal, Shin-fen Cheng, Jussi Keppo, Long Wang, and Yang Yang. March 2025. "Information Provision and Search Frictions: Evidence from the Taxi Industry in Singapore" 

Key variables: 

	treat: equal to 1 for trips involving passenger pickups from any of the four terminals at Changi Airport, and 0 otherwise;
	after: equal to 1 for pre-treatment period spans from January 1, 2009, to December 24, 2009, and 0 otherwise; 
    treat_after: an interaction of treat and after;
	deadheading_speed: defined as deadheading distance divided by deadheading time;
	deadheading_time: time difference between the previous drop-off time and current pick-up time;
	trip_speed: defined as trip distance divided by trip time;
	trip_distance: distance between the current pick-up and next drop-off locations;
	trip_time: time difference between the current pick-up time and next drop-off time;
	trip_fare: the cost of the current taxi ride in Singapore dollars;
	driverid: the id of the taxi driver;
	taxi_num_id: the id of the taxi;
	previous_dropoff_district: the district code for previous drop off location;
	start_district: the district code for current pick up location;
	end_district: the district code for next drop off location;
	year: the year of the trip;
	month: the month of the trip;
	day: the day-of-month of the trip;
	ym: the year-month of the trip;
	date: the date of the trip;
	hour: the hour-of-day of the trip;
	terminal: the pick-up terminal of the trip;
	gap_arrival: the absolute difference between the hourly count of flight arrivals and the hourly taxi pick-ups;
	gap_departure: the absolute difference between the hourly count of flight departures and the hourly taxi drop-offs;
	gap_arrival_lag: the absolute difference between the laggeg hourly count of flight arrivals and the hourly taxi pick-ups;
	gap_departure_lag: the absolute difference between the hourly count of flight departures and the laggeg hourly taxi drop-offs;
		
	
/*	

        

***************************************
******	Table 1: Summary Statistics of the Randomized Sample
***************************************  

use ReSTATs_replication_data_full,clear

** Generate a sample that includes all trips with passenger pickups at the airport and a 2% random selection of trips with pickups at non-airport locations.

preserve
sample 2 if treat==0
save control.dta, replace
restore

preserve
keep if treat==1
save treat.dta, replace
restore

use treat,clear
append using control.dta
save ReSTATs_replication_data_sub,replace

** summary stats of key regressors at the trip level
use ReSTATs_replication_data_sub,clear

preserve
keep treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
replace trip_fare=trip_fare/100
order treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
bys treat after: outreg2 using table1.doc, replace dec(2)  sum(detail)  eqkeep(N mean sd)
restore

***************************************
******	Table 2: Estimation Results on Deadheading Behavior
*************************************** 

use ReSTATs_replication_data_sub,clear

******  Panel A: Baseline Results
reghdfe ln_deadheading_speed treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

******  Panel B: Baseline Results - Excluding Dec. 2009
reghdfe ln_deadheading_speed treat_after if ym!=599, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if ym!=599, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

***************************************
******	Table 3: Estimation Results on Deadheading Behavior - Subsample Analysis
*************************************** 

use ReSTATs_replication_data_sub,clear

reghdfe ln_deadheading_speed treat_after if deadheading_speed<10, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_speed treat_after if deadheading_speed<5, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_speed treat_after if deadheading_speed<3, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

reghdfe ln_deadheading_time treat_after if deadheading_time>60, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if deadheading_time>90, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if deadheading_time>120, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

***************************************
******	Table 4: Underlying Mechanism: A Reduced Mismatch at the Airport
*************************************** 

use ReSTATs_replication_data_flights_taxi.dta,clear

** generate absolute difference between the hourly count of flight arrivals(departures) and the hourly taxi pick-ups(drop-offs) at each terminal.

gen gap_arrival=abs(air_volume_arrival_terminal - taxi_vol_terminal_arr)
gen gap_departure=abs(air_volume_dep_terminal - taxi_vol_terminal_dep)
gen gap_arrival_lag=abs(air_volume_arrival_terminal - taxi_vol_terminal_arr_lag)
gen gap_departure_lag=abs(air_volume_dep_terminal_lag - taxi_vol_terminal_dep)


** generate absolute difference between the hourly count of flight arrivals and the hourly taxi pick-ups at each terminal using three transition ratios.

gen gap_arrival1=abs(air_volume_arrival_terminal*50 - taxi_vol_terminal_arr)
gen gap_arrival2=abs(air_volume_arrival_terminal*75 - taxi_vol_terminal_arr)
gen gap_arrival3=abs(air_volume_arrival_terminal*100 - taxi_vol_terminal_arr)

******  Panel A: The Impact on Taxi Mismatch at Terminals

reghdfe gap_arrival after, abs(terminal year month day hour) vce(cluster terminal)
reghdfe gap_departure after, abs(terminal year month day hour) vce(cluster terminal)

reghdfe gap_arrival_lag after, abs(terminal year month day hour) vce(cluster terminal)
reghdfe gap_departure_lag after, abs(terminal year month day hour) vce(cluster terminal)

******  Panel B: Estimations with Different Transition Ratios

reghdfe gap_arrival1 after, abs(terminal year month day hour) vce(cluster terminal)
reghdfe gap_arrival2 after, abs(terminal year month day hour) vce(cluster terminal)
reghdfe gap_arrival3 after, abs(terminal year month day hour) vce(cluster terminal)

***************************************
******	Table 5: Changes in Terminal Selection and Timing of Visits
*************************************** 

** gennerate the variable of HHI_terminal

use ReSTATs_replication_data_sub,clear
keep if treat==1
bys taxi_num_id year_month: gen month_airport_visit=_N
bys taxi_num_id start_loc year_month: gen month_terminal_visit=_N
duplicates drop taxi_num_id start_loc year_month,force
gen market_share= month_terminal_visit/month_airport_visit
gen market_share2= market_share*market_share
bys taxi_num_id year_month: egen HHI=total(market_share2)
duplicates drop taxi_num_id year_month,force
gen after1=1 if year==2010
replace after1=0 if after1==.

****** Estimation results of Column (1)

reghdfe HHI after1, abs(taxi_num_id month) vce(cluster taxi_num_id)

** gennerate the variable of HHI_hour

use ReSTATs_replication_data_sub,clear
keep if treat==1
bys taxi_num_id year_month: gen month_airport_visit=_N
bys taxi_num_id hour year_month: gen month_hour_visit=_N
duplicates drop taxi_num_id hour year_month,force
gen market_share= month_hour_visit/month_airport_visit
gen market_share2= market_share*market_share
bys taxi_num_id year_month: egen HHI=total(market_share2)
duplicates drop taxi_num_id year_month,force
gen after1=1 if year==2010
replace after1=0 if after1==.

****** Estimation results of Column (2)

reghdfe HHI after1 , abs(taxi_num_id month) vce(cluster taxi_num_id)

***************************************
******	Table 6: Estimation Results of Increased Inclination and Increased Visits
*************************************** 

******  Panel A: Inclination of Going to the Airport

use ReSTATs_replication_data_full,clear

reghdfe treat after,  abs(driverid year month day hour previous_dropoff_district start_district end_distric) vce(cluster taxi_num_id)
reghdfe treat after after_CBD after_Adjacent,  abs(driverid year month day hour previous_dropoff_district start_district end_distric) vce(cluster taxi_num_id)


******  Panel B: Driver's Daily Frequency of Visiting the Airport

use ReSTATs_replication_data_daily_unbalanced.dta,clear
reghdfe ln_airport_daily_visit after , abs(driverid year month day) vce(cluster taxi_num_id)

use ReSTATs_replication_data_daily_balanced.dta,clear
reghdfe ln_airport_daily_visit after , abs(driverid year month day) vce(cluster taxi_num_id)

***************************************
******	Figure 1: Dynamic Changes of the Deadheading Measures
*************************************** 

use ReSTATs_replication_data_sub,clear

parmby "reghdfe ln_deadheading_speed treat_ym_id7-treat_ym_id24,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(1.dta, replace)
parmby "reghdfe ln_deadheading_time treat_ym_id7-treat_ym_id24,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(2.dta, replace)

use 1.dta,clear
split parm,p("treat_ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to Signboard Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-0.2(0.1)0.3, nogmax nogextend tlength(mall) labsize(small)) title(Panel A: Deadheading Speed, color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph save Graph "speed.gph",replace

use 2.dta,clear
split parm,p("treat_ym_id")
destring parm2,replace
drop if parm2==.
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to Signboard Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-0.2(0.1)0.3, nogmax nogextend tlength(mall) labsize(small)) title(Panel B: Deadheading Time, color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph save Graph "time.gph",replace

gr combine speed.gph time.gph, row(1) xsize(8) ysize(3) graphregion(fcolor(white))
graph export "figure1.png", as(png) name("Graph") replace

***************************************
******	Figure 2: Dynamic Differences of Taxi Pickups (Dropoffs) and Flight Arrivals (Departures)
*************************************** 

use ReSTATs_replication_data_flights_taxi.dta,clear

** generate absolute difference between the hourly count of flight arrivals(departures) and the hourly taxi pick-ups(drop-offs) at each terminal.

gen gap_arrival=abs(air_volume_arrival_terminal - taxi_vol_terminal_arr)
gen gap_departure=abs(air_volume_dep_terminal - taxi_vol_terminal_dep)
gen gap_arrival_lag=abs(air_volume_arrival_terminal - taxi_vol_terminal_arr_lag)
gen gap_departure_lag=abs(air_volume_dep_terminal_lag - taxi_vol_terminal_dep)

parmby "reghdfe gap_arrival ym_id7-ym_id24, abs(terminal day hour) vce(cluster terminal)", lab saving(3.dta, replace)
parmby "reghdfe gap_departure ym_id7-ym_id24, abs(terminal day hour) vce(cluster terminal)", lab saving(4.dta, replace)
parmby "reghdfe gap_arrival_lag ym_id7-ym_id24, abs(terminal hour) vce(cluster terminal)", lab saving(5.dta, replace)
parmby "reghdfe gap_departure_lag ym_id7-ym_id24, abs(terminal day hour) vce(cluster terminal)", lab saving(6.dta, replace)

use 3.dta,clear
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to TMS Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-25(5)10, nogmax nogextend tlength(mall) labsize(small)) title(Panel A: abs(arrival-pickup), color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph save Graph "gap1.gph",replace

use 4.dta,clear
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to TMS Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-25(5)10, nogmax nogextend tlength(mall) labsize(small)) title(Panel B: abs(departure-dropoff), color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph save Graph "gap2.gph",replace

use 5.dta,clear
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to TMS Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-25(5)10, nogmax nogextend tlength(mall) labsize(small)) title(Panel C: abs(lagged_arrival-pickup), color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph save Graph "gap3.gph",replace

use 6.dta,clear
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to TMS Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-25(5)10, nogmax nogextend tlength(mall) labsize(small)) title(Panel D: abs(departure-lagged_dropoff), color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph save Graph "gap4.gph",replace

gr combine gap1.gph gap2.gph gap3.gph gap4.gph, row(2) xsize(8) ysize(6) graphregion(fcolor(white))
graph export "figure2.png", as(png) name("Graph") replace

***************************************
******	Figure 3: Heterogeneity Test
*************************************** 

use ReSTATs_replication_data_sub,clear

parmby "reghdfe ln_deadheading_speed treat_after_start_loc_id1-treat_after_start_loc_id4 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(7.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_start_loc_id1-treat_after_start_loc_id4 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(8.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_time_zone_id1-treat_after_time_zone_id4 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(9.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_time_zone_id1-treat_after_time_zone_id4,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(10.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_quantile_id1-treat_after_quantile_id5,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(11.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_quantile_id1-treat_after_quantile_id5,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(12.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_start_district_id1-treat_after_start_district_id28,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(13.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_start_district_id1-treat_after_start_district_id28,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(14.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_gender_id1-treat_after_gender_id2 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(15.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_gender_id1-treat_after_gender_id2,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(16.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_old_driver_id1-treat_after_old_driver_id2 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(17.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_old_driver_id1-treat_after_old_driver_id2,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(18.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_race_c_id1-treat_after_race_c_id3 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(19.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_race_c_id1-treat_after_race_c_id3,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(20.dta, replace)

parmby "reghdfe ln_deadheading_speed treat_after_education_id1-treat_after_education_id3 ,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(21.dta, replace)
parmby "reghdfe ln_deadheading_time treat_after_education_id1-treat_after_education_id3,  abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)", lab saving(22.dta, replace)


use 7.dta,clear
rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 
replace parm="Budget Terminal" if parm=="treat_after_start_loc_id1"
replace parm="Terminal 1" if parm=="treat_after_start_loc_id2"
replace parm="Terminal 2" if parm=="treat_after_start_loc_id3"
replace parm="Terminal 3" if parm=="treat_after_start_loc_id4"
drop if parmseq==5
encode parm,gen(parm_id)
keep estimate_speed min95_speed max95_speed parm parm_id
save hetero_terminal.dta,replace

use 8.dta,clear
rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 

replace parm="Budget Terminal" if parm=="treat_after_start_loc_id1"
replace parm="Terminal 1" if parm=="treat_after_start_loc_id2"
replace parm="Terminal 2" if parm=="treat_after_start_loc_id3"
replace parm="Terminal 3" if parm=="treat_after_start_loc_id4"
drop if parmseq==5

encode parm,gen(parm_id)
keep estimate_time min95_time max95_time parm parm_id

merge m:m parm using hetero_terminal.dta

twoway bar estimate_speed parm_id ,barwidth(0.5)  color(ltblue) vertical || bar estimate_time parm_id , barwidth(0.5)  color(orange_red) vertical || (rcap min95_time max95_time parm_id , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parm_id , color(gs5) lpattern(dash)), xlabel(1 "Budget" 2 "T1" 3 "T2" 4 "T3",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel A: by Terminal,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_terminal.gph",replace


use 9.dta,clear

rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 
drop if parmseq==5

keep estimate_speed min95_speed max95_speed parm parmseq
save hetero_time.dta,replace

use 10.dta,clear

rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 
drop if parmseq==5

merge m:m parmseq using hetero_time.dta

twoway bar estimate_speed parmseq , barwidth(0.5) color(ltblue) vertical || bar estimate_time parmseq , barwidth(0.5) color(orange_red) vertical || (rcap min95_time max95_time parmseq , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parmseq , color(gs5) lpattern(dash)), xlabel(1 "Morning" 2 "Noon" 3 "Afternoon" 4 "Evening",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel B: by Hour of the Day,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_time.gph",replace

use 11.dta,clear

rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 
drop if parmseq==6

keep estimate_speed min95_speed max95_speed parm parmseq
save hetero_experience.dta,replace

use 12.dta,clear

rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 
drop if parmseq==6

merge m:m parmseq using hetero_experience.dta

twoway bar estimate_speed parmseq , barwidth(0.5) color(ltblue) vertical || bar estimate_time parmseq , barwidth(0.5) color(orange_red) vertical || (rcap min95_time max95_time parmseq , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parmseq , color(gs5) lpattern(dash)), xlabel(1 "Quintile 1" 2 "Quintile 2" 3 "Quintile 3" 4 "Quintile 4" 5 "Quintile 5",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel C: by Quintile of Historical Frequency,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_experience.gph",replace


use 13.dta,clear

rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 
drop if parmseq==29

keep estimate_speed min95_speed max95_speed parm parmseq
save hetero_district.dta,replace

use 14.dta,clear

rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 
drop if parmseq==29

merge m:m parmseq using hetero_district.dta

twoway bar estimate_speed parmseq , barwidth(0.5) color(ltblue) vertical || bar estimate_time parmseq , barwidth(0.5) color(orange_red) vertical || (rcap min95_time max95_time parmseq , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parmseq , color(gs5) lpattern(dash)), xlabel( 1 "1" 2 "2" 3 "3" 4 "4" 5 "5" 6 "6" 7 "7" 8 "8" 9 "9" 10 "10" 11 "11" 12 "12" 13 "13" 14 "14" 15 "15" 16 "16" 17 "17" 18 "18" 19 "19" 20 "20" 21 "21" 22 "22" 23 "23" 24 "24" 25 "25" 26 "26" 27 "27" 28 "28",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel D: by Districts,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_district.gph",replace

use 15.dta,clear

rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 

replace parm="Female Driver" if parm=="treat_after_gender_id1"
replace parm="Male Driver" if parm=="treat_after_gender_id2"
drop if parmseq==3

encode parm,gen(parm_id)
keep estimate_speed min95_speed max95_speed parm parm_id
save hetero_gender.dta,replace

use 16.dta,clear

rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 

replace parm="Female Driver" if parm=="treat_after_gender_id1"
replace parm="Male Driver" if parm=="treat_after_gender_id2"
drop if parmseq==3

encode parm,gen(parm_id)
keep estimate_time min95_time max95_time parm parm_id

merge m:m parm using hetero_gender.dta

twoway bar estimate_speed parm_id , barwidth(0.3) color(ltblue) vertical || bar estimate_time parm_id , barwidth(0.3) color(orange_red) vertical || (rcap min95_time max95_time parm_id , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parm_id , color(gs5) lpattern(dash)), xlabel(1 "Female" 2 "Male",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel E: by Driver's Gender,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_gender.gph",replace


use 17.dta,clear

rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 

replace parm="Young Driver" if parm=="treat_after_old_driver_id1"
replace parm="Old Driver" if parm=="treat_after_old_driver_id2"
drop if parmseq==3

encode parm,gen(parm_id)
keep estimate_speed min95_speed max95_speed parm parm_id
save hetero_age.dta,replace

use 18.dta,clear

rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 

replace parm="Young Driver" if parm=="treat_after_old_driver_id1"
replace parm="Old Driver" if parm=="treat_after_old_driver_id2"
drop if parmseq==3

encode parm,gen(parm_id)
keep estimate_time min95_time max95_time parm parm_id

merge m:m parm using hetero_age.dta

twoway bar estimate_speed parm_id , barwidth(0.3) color(ltblue) vertical || bar estimate_time parm_id , barwidth(0.3) color(orange_red) vertical || (rcap min95_time max95_time parm_id , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parm_id , color(gs5) lpattern(dash)), xlabel(1 "Young" 2 "Old",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel F: by Driver's Age,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_age.gph",replace


use 19.dta,clear

rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 

replace parm="Chinese Driver" if parm=="treat_after_race_c_id1"
replace parm="Indian Driver" if parm=="treat_after_race_c_id2"
replace parm="Malaysian" if parm=="treat_after_race_c_id3"
drop if parmseq==4

encode parm,gen(parm_id)
keep estimate_speed min95_speed max95_speed parm parm_id
save hetero_race.dta,replace

use 20.dta,clear

rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 

replace parm="Chinese Driver" if parm=="treat_after_race_c_id1"
replace parm="Indian Driver" if parm=="treat_after_race_c_id2"
replace parm="Malaysian Driver" if parm=="treat_after_race_c_id3"
drop if parmseq==4

encode parm,gen(parm_id)
keep estimate_time min95_time max95_time parm parm_id

merge m:m parm using hetero_race.dta

twoway bar estimate_speed parm_id , barwidth(0.4) color(ltblue) vertical || bar estimate_time parm_id , barwidth(0.4) color(orange_red) vertical || (rcap min95_time max95_time parm_id , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parm_id , color(gs5) lpattern(dash)), xlabel(1 "Chinese" 2 "Indian" 3 "Malaysian",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel G: by Driver's Race,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_race.gph",replace

use 21.dta,clear


rename estimate estimate_speed 
rename min95 min95_speed 
rename max95 max95_speed 

replace parm="High Education" if parm=="treat_after_education_id1"
replace parm="Primary Education" if parm=="treat_after_education_id2"
replace parm="Secondary Education" if parm=="treat_after_education_id3"
drop if parmseq==4
replace parmseq=4 if parmseq==1
sort parmseq

encode parm,gen(parm_id)
keep estimate_speed min95_speed max95_speed parm parm_id parmseq
save hetero_education.dta,replace

use 22.dta,clear


rename estimate estimate_time
rename min95 min95_time 
rename max95 max95_time 

replace parm="High Education" if parm=="treat_after_education_id1"
replace parm="Primary Education" if parm=="treat_after_education_id2"
replace parm="Secondary Education" if parm=="treat_after_education_id3"
drop if parmseq==4
replace parmseq=4 if parmseq==1
sort parmseq

encode parm,gen(parm_id)
keep estimate_time min95_time max95_time parm parm_id parmseq

merge m:m parm using hetero_education.dta

replace parmseq=parmseq-1

twoway bar estimate_speed parmseq , barwidth(0.4) color(ltblue) vertical || bar estimate_time parmseq , barwidth(0.4) color(orange_red) vertical || (rcap min95_time max95_time parmseq , color(gs5) lpattern(dash)) || (rcap min95_speed max95_speed parmseq , color(gs5) lpattern(dash)), xlabel(1 "Primary" 2 "Secondary" 3 "High",labsize(small))  ylabel(#8,labsize(small)) xtitle("" ,size(small)) ytitle(Estimated Coefficient,size(small)) title(Panel H: by Driver's Education,size(med) color(dark)) legend(off)  graphregion(fcolor(white))
graph save Graph "hetero_education.gph",replace


gr combine hetero_terminal.gph hetero_time.gph hetero_experience.gph hetero_district.gph hetero_gender.gph hetero_age.gph hetero_race.gph hetero_education.gph, row(4) xsize(7) ysize(8) graphregion(fcolor(white))
graph export "figure3.png", as(png) name("Graph") replace

